Data import

library(jsonlite)
library(geojson)
## 
## Attaching package: 'geojson'
## The following object is masked from 'package:graphics':
## 
##     polygon
library(leaflet)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(ggplot2)
library(RColorBrewer)
library(stringr)
library(summarytools)
## 
## Attaching package: 'summarytools'
## The following objects are masked from 'package:Hmisc':
## 
##     label, label<-
library(stats)
library(leaflet.minicharts)

features <- read.csv("D:/Dropbox/DATA/2018_boat_survey/features.csv", stringsAsFactors = FALSE)
#features <- read.csv("C:/Users/Toby/Dropbox/DATA/2018_boat_survey/features.csv", stringsAsFactors = FALSE)
names(features)[names(features)=="LenghtKM"] <- "km"#renaming misspelled column
freqCoding <- read.csv("reference_tables/freqCoding.csv", stringsAsFactors = FALSE) #this file was created to quickly recode trip frequencies

data <- read.csv("D:/Dropbox/DATA/2018_boat_survey/data.csv", stringsAsFactors = FALSE)
#data <- read.csv("C:/Users/Toby/Dropbox/DATA/2018_boat_survey/data.csv", stringsAsFactors = FALSE)

Clean and mutate features.csv Each row in this file is associated with a track a respondent entered in the mapping tool of the survey (custom programmed in Qualtrics). First, we recode trip frequencies as stated by respondents in form of an interval. We take the mean of the interval for further analysis. Note, respondents stated trip frequencies for the 2018 summer season (current summer) and answered a hypothetical question how the number of trips would have changed would elodea clog some of the waterbodies they boated on.

#calculating pre elodea trip frequencies
features2 <- features%>%
  left_join(freqCoding,  by = c("trackFrequ"="Coding"))   #recoding trip frequencies
features2$freqPre <- round(rowMeans(features2[c("Lower","Upper")], na.rm=TRUE),digits=0)
names(features2)[names(features2)=="Lower"] <- "LowerPre" #Renaming columns for the lower bound of the frequency interval pre invasion
names(features2)[names(features2)=="Upper"] <- "UpperPre" #Renaming columns for the upper bound of the frequency interval pre invasion

#Calculating post elodea trip frequencies
features3 <- features2 %>%
  left_join(freqCoding,  by = c("trackFre_1"="Coding"))
features3$freqPost <- round(rowMeans(features3[c("Lower","Upper")], na.rm=TRUE), digits=0)
names(features3)[names(features3)=="Lower"] <- "LowerPost" #Renaming columns for the lower bound of the frequency interval post invasion
names(features3)[names(features3)=="Upper"] <- "UpperPost" #Renaming columns for the upper bound of the frequency interval post invasion

Calculating annual operating cost per route. Here, we calculate the annual km as the product of the stated trip frequency, round-trip length in km (calculated by ArcGIS), the stated hourly boat operating cost, and the inverse of the assumed average travel speed of 10 miles/h (16km/h). Both pre- and post-invasion costs are calculated.

#hCost: hourly operating cost
features3$hCost <- rowMeans(features3[c("hCostHigh","hCostLow")], na.rm=TRUE) #calculating the hCost interval mean for furthe ranalysis, ignore missing 
features3$hCost[features3$hCost==20000] <- NA  #outliers and missing value
#medianCost <- median(features3$hCost, na.rm=TRUE)  #calculating the median cost per h
#features3$hCost <- with(features3, ifelse(is.na(hCost), medianCost, hCost))  # inserting median cost for missing values
features3$hCost[is.nan(features3$hCost)] <- NA  #R tried to get the mean of the Lower and Upper values but if respondent left blank those are NA, so the mean is also NA

#outliers for hCost, turning into NAs anything above $100/h, one was $500/h which was marine based solely, two others freshwater based solely who were $125 and $150/h (likely entering errors)
features3$hCost <- with(features3, ifelse(hCost>100,NA,hCost))

#km: outliers, setting frequency of trips pre if Up to 4 setting it to 1 if km >300
features3$freqPre <- with(features3, ifelse(km>300 & trackFrequ=="Up to 4",1,freqPre))
#Outliers in trip frequencies
features3$freqPre <- with(features3, ifelse(freqPre>100,NA,freqPre))
features3$freqPost <- with(features3, ifelse(freqPost>100,NA,freqPre))

#rank of route
features3$rank <- with(features3, ifelse(rank==0, 1, rank)) #dealing with ranks that are 0

#YrCostPre and Post: calculating the total km per route then annual cost per route
features3$totalKmPre <- with(features3, km * freqPre)
features3$totalKmPost <- with(features3, km * freqPost)
features3$YrCostPre <- with(features3, totalKmPre * hCost)
features3$YrCostPost <- with(features3, totalKmPost * hCost)
#eliminating one marine route that is an outlier
features3<-features3[!features3$totalKmPre>10000,]

#turning hCost into NAs for YrCostPre exceeding income and few government related routes
features3$hCost <- with(features3, ifelse(YrCostPre>50000,NA,hCost))
#recalculating the above YrCostPre and YrCostPost
features3$totalKmPre <- with(features3, km * freqPre)
features3$totalKmPost <- with(features3, km * freqPost)
features3$YrCostPre <- with(features3, totalKmPre * hCost)
features3$YrCostPost <- with(features3, totalKmPost * hCost)

#adding up number of trips annually for each respondent across all of his/her routes
totalTripsByRes <- features3%>%
  group_by(responseID)%>%
  summarise(totalTripsPre = sum(freqPre),
            totalTripsPost = sum(freqPost),
            routeCount = n())%>%
  select(responseID, totalTripsPre, totalTripsPost, routeCount)
features3 <- features3 %>%
  left_join(totalTripsByRes, by="responseID")

Missing data, outliers, and recoding on other variables. Associating each route with either private, commercial, or government related operators.

#INCOME recoding, cleaning, and dealing with missing data on income
incCoding <- read.csv("reference_tables/incCoding.csv", stringsAsFactors = FALSE)
features4 <- features3%>%
  dplyr::left_join(incCoding,  by = "persIncomeBtax")
features4$Income <- as.numeric(as.character(features4$Income))
## Warning: NAs introduced by coercion
#imputing missing values for income
#medianIncome <- median(features4$Income, na.rm=TRUE)  #calc. median income
#features4$Income <- with(features4, ifelse(is.na(Income), medianIncome, Income))  #using median income for missing income data

#AGE: questionable data, deleting entries coded "Not applicable"
features4$age <- with(features4, ifelse(age==9 | age=="Not applicable",NA,age))
features4$age <- as.numeric(as.character(features4$age))

#ageBin
features4$ageBin <- with(features4, ifelse(age>20&age<=30,"21-30",ifelse(age>30&age<= 40,"31-40",ifelse(age>40&age<=50,"41-50", ifelse(age>50&age<=60,"51-60", ifelse(age>60&age<=70,"61-70", ifelse(age>70&age<=80, "71-80", ">80")))))))
features4$ageBin <- factor(features4$ageBin, levels=c("21-30", "31-40", "41-50", "51-60", "61-70", "71-80",">80"), labels=c("21-30", "31-40", "41-50", "51-60", "61-70", "71-80",">80")) #turning ageBin into factor

#Clean Drain Dry, coding respondents who did not respond to 
names(features4)[names(features4)=="CleanDrainDry"] <- "Clean"
features4$Clean <- with(features4, ifelse(Clean=="", "Did not report", ifelse(Clean=="half the time","50% of the time",ifelse(Clean=="Every time", "100% of the time",ifelse(Clean=="Never", "0% of the time", Clean)))))
features4$Clean <- factor(features4$Clean, levels=c("Did not report","0% of the time","25% of the time","50% of the time","75% of the time","100% of the time"), labels=c("Did not report","0% of the time","25% of the time","50% of the time","75% of the time","100% of the time")) 

#wasInElodea, clean, recode
#features4$wasInElodea<-str_replace_all(features$wasInElodea,"\\s","_")
features4$wasInElodea <- with(features4, ifelse(wasInElodea=="",NA,wasInElodea))
features4$wasInElodea <- with(features4, ifelse(wasInElodea=="I_cannot_remember","Cannot remember",wasInElodea))
features4$wasInElodea <- factor(features4$wasInElodea, levels=c("Yes", "No", "Did not report", "Cannot remember"), labels=c("Yes", "No", "Did not report", "Cannot remember"))

#freshOutside: recoding, turning into factor for graphing
features4$freshOutside <- with(features4, ifelse(freshOutside=="Yes","Yes",ifelse(freshOutside=="No","No","Did not report")))
features4$freshOutside <- factor(features4$freshOutside, levels=c("Yes", "No", "Did not report"), labels=c("Yes", "No", "Did not report"))

#PAX cleaning
features4$pax <- with(features4, ifelse(pax=="none"|pax=="43134"|pax=="",NA,pax))
features4$pax <- as.numeric(as.character(features4$pax))

#USER GROUP, TYPE of OPERATOR 
features4$percentCom <- gsub("[[:punct:]]", " ", features4$percentCom)  #getting rid of % sign in character string
features4$percentCom <- as.numeric(as.character(features4$percentCom))  #converting character to numeric
features4$percentCom <- with(features4, ifelse(is.na(percentCom),0,percentCom/100))
features4$percentGov <- gsub("[[:punct:]]", " ", features4$percentGov)
features4$percentGov <- as.numeric(as.character(features4$percentGov))
features4$percentGov <- with(features4, ifelse(is.na(percentGov),0,percentGov/100))
features4$percentPer <- with(features4, 1-percentGov-percentCom)

#correcting wrong respondent entry on percentGov
features4$percentGov <- with(features4, ifelse(percentPer<0,percentGov-percentCom,percentGov))
features4$percentPer <- with(features4, 1-percentGov-percentCom)

features4$type <- with(features4, ifelse(percentPer>0.5, "personal", ifelse(percentCom>0.5, "commercial", "government")))
#Creating subsets by type
featuresCom <- subset(features4, type=="commercial")
featuresPers <- subset(features4, type=="personal")
featuresGov <- subset(features4, type=="government")

Preparing features file for leaflet shiny app

#Consolidating levels for several categorical variables which will be used to subset the tracks that are shown. note, leaves the class of these variables as character 
featuresApp <- features4
featuresApp$wasInElodea <- with(featuresApp, ifelse(wasInElodea=="Yes","Yes","No"))
featuresApp$freshOutside <- with(featuresApp, ifelse(freshOutside=="Yes","Yes","No or did not report"))
featuresApp$Clean <- with(featuresApp, ifelse(Clean=="100% of the time","100% of the time","Less than 100% of the time"))

write.csv(featuresApp, "boater_app/data/featuresApp.csv" )

#creating geojson file combining data.csv with features4.csv 
#system("C:/Users/Toby/Documents/ANALYSIS_R/boatmovement/appdir/data> python.exe .\map-data-joiner.py .\map.geojson .\features4.csv .\joined-output.geojson")

Clean and mutate data.csv file

#trimming % sign in character string, then converting character to numeric, doing for percentCom and percentGov
data2 <- data
data2$percentCom <- gsub("[[:punct:]]", " ", data2$percentCom)
data2$percentCom <- as.numeric(as.character(data2$percentCom))
data2$percentCom <- with(data2, ifelse(is.na(percentCom),0,percentCom/100))
data2$percentGov <- gsub("[[:punct:]]", " ", data2$percentGov)
data2$percentGov <- as.numeric(as.character(data2$percentGov))
data2$percentGov <- with(data2, ifelse(is.na(percentGov),0,percentGov/100))
data2$percentPer <- with(data2, 1-percentGov-percentCom)

#correcting wrong respondent entry on percentGov
data2$percentGov <- with(data2,ifelse(percentPer<0,percentGov-percentCom,percentGov))
data2$percentPer <- with(data2, 1-percentGov-percentCom)

#renaming columns
names(data2)[names(data2)=="CleanDrainDry"] <- "Clean"

#INCOME
incCoding <- read.csv("reference_tables/incCoding.csv", stringsAsFactors = FALSE)
data3 <- data2%>%
  dplyr::left_join(incCoding,  by = "persIncomeBtax")
data3$Income <- as.numeric(as.character(data3$Income))
## Warning: NAs introduced by coercion
medianIncome <- median(data3$Income, na.rm=TRUE)  #dealing with missing data on income using the median
data3$Income <- with(data3, ifelse(is.na(Income), medianIncome, Income))

#AGE: questionable data on age and deleting entries coded "Not applicable"
data3$age <- with(data3, ifelse(age<18 | age=="Not applicable",NA,age))
data3$age <- as.numeric(as.character(data3$age))
## Warning: NAs introduced by coercion
data4 <- data3
#ageBin
data4$ageBin <- with(data4, ifelse(age>20&age<=30,"21-30",ifelse(age>30&age<= 40,"31-40",ifelse(age>40&age<=50,"41-50", ifelse(age>50&age<=60,"51-60", ifelse(age>60&age<=70,"61-70", ifelse(age>70&age<=80, "71-80", ifelse(is.na(age),NA, ">80"))))))))
data4$ageBin <- factor(data4$ageBin, levels=c("21-30", "31-40", "41-50", "51-60", "61-70", "71-80",">80"), labels=c("21-30", "31-40", "41-50", "51-60", "61-70", "71-80",">80")) #turning ageBin into factor

#Clean Drain Dry
data4$Clean <- with(data4, ifelse(Clean=="", "Did not report", ifelse(Clean=="half the time","50% of the time",ifelse(Clean=="Every time", "100% of the time",ifelse(Clean=="Never", "0% of the time", Clean)))))

data4 <- subset(data4, select = -c(IPAddress, operateType, persIncomeBtax)) #cleaning up columns no longer needed

#TYPE
data4$type <- with(data4, ifelse(percentPer>0.5, "personal", ifelse(percentCom>0.5, "commercial", "government")))

#wasInElodea
data5 <- data4
data5$wasInElodea <- with(data5, ifelse(wasInElodea=="", NA, wasInElodea))
data5$wasInElodea <- with(data5, 
                          ifelse(stringr::str_detect(wasInElodea,"I DID NOT operate a boat in any of the waterbodies listed"), "No",
                                ifelse(stringr::str_detect(wasInElodea,"I cannot remember"), "Cannot remember", "Yes")))
#freshOutside
data5$freshOutside <- with(data5, ifelse(freshOutside=="Yes","Yes",ifelse(freshOutside=="No","No","Did not report")))
#data5$freshOutside <- factor(data5$freshOutside, levels=c("Yes", "No", "Did not report"), labels=c("Yes", "No", "Did not report"))

data5$Clean <- factor(data5$Clean, levels=c("Did not report","0% of the time","25% of the time","50% of the time","75% of the time","100% of the time"), labels=c("Did not report","0% of the time","25% of the time","50% of the time","75% of the time","100% of the time"))  #need factor for later plot

data5$purchaseType <- with(data5, ifelse(purchaseType=="","Not known",purchaseType)) 
data5$purchaseLoc <- with(data5, ifelse(purchaseLoc=="","Not known",purchaseLoc)) 
# creating subsets for each type of operator
dataCom <- subset(data5, type=="commercial")
dataPers <- subset(data5, type=="personal")
dataGov <- subset(data5, type=="government")

Preparing data file for leaflet shiny app

dataApp <- data5
dataApp$wasInElodea <- with(dataApp, ifelse(wasInElodea=="Yes","Yes","No"))
dataApp$freshOutside <- with(dataApp, ifelse(freshOutside=="Yes","Yes","No or did not report"))
dataApp$Clean <- with(dataApp, ifelse(Clean=="100% of the time","100% of the time","Less than 100% of the time"))

#Writing data file into appdir for the leaflet shiny app
write.csv(dataApp, "boater_app/data/dataApp.csv" )

Survey analysis

1. Response

Of the 5095 mailed envelopes, 3.6% were undeliverable for an effective sample size of 4914. Of these the survey received 965 responses for a response rate of 19.6%. The effective sample contained 220 business contacts which resulted in 21 business responses. Some respondents (0.5%) reported to have online difficulties with mapping their boat tracks and 0.3% reported to not have Internet. 21 respondents returned the $1 bill by mailing the bill back to the research team. Two respondents requested to be taken off the mailing list for the reminder mailings.

Of the 965 respondents, 36 percent solely operated in marine waters and consequently were not asked to provide mapping detail of their 2018 boat tracks. 29 percent (n=280) of respondents solely operated in freshwater, while 18 percent (n=175) operated in both fresh and marine waters which explains why the resulting geographic information on boat tracks includes tracks in marine waters. 15 percent reported no to have used their boat in 2018.

#Number of boaters in each user group who responded
data5$operateFresh1 <- with(data5, ifelse(operateFresh=="Yes"&operateMarine=="No",1,0))
data5$operateMarine1 <- with(data5, ifelse(operateMarine=="Yes"&operateFresh=="No",1,0)) #no data collection for marine boaters
data5$didNotOperate1 <- with(data5, ifelse(operateMarine=="No"&operateFresh=="No",1,0))
data5$operateBoth1 <- with(data5, ifelse(operateMarine=="Yes"&operateFresh=="Yes", 1,0)) #consequently both, marine and fresh water routes were collected
data5$User <- with(data5, ifelse(operateFresh1==1,"Only freshwater",ifelse(operateMarine1==1,"Only marine",ifelse(operateBoth1==1,"Fresh and marine","Did not operate"))))

userGroups <- data5%>%
  summarise(FreshOnly = round((sum(operateFresh1)/nrow(data5))*100,digits=0),
            MarineOnly = round((sum(operateMarine1)/nrow(data5))*100,digits=0),
            Both = round((sum(operateBoth1)/nrow(data5))*100,digits=0),
            DidNot = round((sum(didNotOperate1)/nrow(data5))*100,digits=0),
            FreshOnlyCount = sum(operateFresh1),
            BothCount = sum(operateBoth1))

Tables: Number of respondents with and without mapping answers

maprespondents <- features4 %>%
  distinct(responseID, LocationLatitude, LocationLongitude) %>% 
  drop_na()

data5 <- data5%>%
  left_join(maprespondents, by = "responseID")
data5$mapres <- with(data5, ifelse(!is.na(LocationLatitude.y),1,0))
data6 <- select(data5, -c("LocationLatitude.y","LocationLongitude.y"))

mapResponse <- subset(data6, mapres==1)
mapNonRes <- subset(data6, mapres==0)

#Summary of respondents with mapping response
mapResSummary <- mapResponse%>%
  group_by(User)%>%
    summarise(median.min = round(median(Duration.s)/60, digits = 1),
            count = n(),
            elodeaCount = sum(wasInElodea=="Yes", na.rm = TRUE))
knitr::kable(mapResSummary, caption = "Summary of respondents with mapping response, n=322")%>%
  kableExtra::kable_styling()
## Warning in kableExtra::kable_styling(.): Please specify format in kable.
## kableExtra can customize either HTML or LaTeX outputs. See https://
## haozhu233.github.io/kableExtra/ for details.
Summary of respondents with mapping response, n=322
User median.min count elodeaCount
Did not operate 9.2 4 0
Fresh and marine 14.0 115 15
Only freshwater 12.4 203 28
#Summary of respondents without mapping response
non.mapResSummary <- mapNonRes%>%
  group_by(User)%>%
  summarise(median.min = round(median(Duration.s)/60, digits = 1),
            count = n(),
            elodeaCount = sum(wasInElodea=="Yes", na.rm = TRUE))
knitr::kable(non.mapResSummary, caption = "Summary of respondents without mapping response, n=643")%>%
  kableExtra::kable_styling()
Summary of respondents without mapping response, n=643
User median.min count elodeaCount
Did not operate 0.7 156 0
Fresh and marine 9.6 60 6
Only freshwater 6.9 77 11
Only marine 0.9 350 0

1.2 Representativeness of the sample

We conducted an independent t-test to see how representative respondents to the survey were in comparison to the general Alaska population. Ideally we had hoped to compare the sample to the sample frame (population) of registered boaters but there is no numeric variable that could be of use to conduct such a comparison. The only available metric to conduct this comparison is personal income reported in the survey which can be compared to the personal income (PINCP variable) in the PUMS dataset for Alaska associated with the 2017 American Community Survey. As to be expected, the result of the t-test shows dissimilarity in samples (p<0.000, t=47.7, df=2313.3). The mean income is much higher in the sample (91,398) compared to the observed population mean in the ACS (38,624) The Figure below shows the sample distribution of income in red while the ACS income distribution is shown in black.

PUMS <- read.csv("reference_tables/PUMS.csv", stringsAsFactors = F)
#F-test if sample variance and ACS variance are the same
var.test(data6$Income, PUMS$PINCP, alternative = "two.sided")  # using PINCP - total person's income closely related to "personal income" the survey asked.  F-test result: variances are not equal, so we need to conduct independent t-test assuming unequal variances.
## 
##  F test to compare two variances
## 
## data:  data6$Income and PUMS$PINCP
## F = 0.31147, num df = 964, denom df = 5159, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.2830600 0.3438708
## sample estimates:
## ratio of variances 
##           0.311475
d1 <- density(data6$Income, na.rm=T)
d2 <- density(PUMS$PINCP, na.rm=T)
plot(d1, col="red", main="Income distributions comparing sample to population",xlab="2017 personal income")
lines(d2)

#R assumes unequal variances by default, which is the Welch two-sample t-test
t.test(data6$Income, PUMS$PINCP, paired=FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  data6$Income and PUMS$PINCP
## t = 47.714, df = 2313.3, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  50605.88 54943.87
## sample estimates:
## mean of x mean of y 
##  91398.96  38624.09

INSERT age and income of entire sample

1.3 Geographic representativeness

The Qualtrics online survey software collects respondent information such as IP addresses which can be used to determine the location the respondent was in when answering the survey. If the respondent completed the survey using the Qualtrics Offline App on a GPS-enabled device, this data will be an accurate representation of the respondent’s location. For all other respondents, the location is an approximation determined by comparing the participant’s IP address to a location database. Inside the United States, this data is typically accurate to the city level.

There was considerable representation of out-of-state responses as the following maps show.

Figure: Map showing clustered locations for the 965 respondents

#creating table of unique respondents and locations 
locations <- data4 %>% 
  distinct(responseID, LocationLatitude, LocationLongitude) %>% 
  drop_na()

data.table::data.table(locations)
##             responseID LocationLatitude LocationLongitude
##   1: R_1FkCiQmxzOZRDev          61.1454         -149.7664
##   2: R_2ts39neDOF8rHk1          61.1886         -149.8878
##   3: R_aboh3Edrotju1Gh          61.1454         -149.7664
##   4: R_xsZTdg5cBYfKlMJ          61.1120         -149.9044
##   5: R_eY9qsmW6UmKC2ml          61.2231         -149.8528
##  ---                                                     
## 844: R_2bW4mALpiIqq7Q5          61.5923         -149.3959
## 845: R_1Kqbe6xyjnwgmlz          61.5923         -149.3959
## 846: R_2yj9zpP30SGiMwR          61.1535         -149.8289
## 847: R_1FKAn4a4lsZL4Bc          64.9109         -147.5105
## 848: R_tKcIMsebXuXzP69          47.4483         -122.2731
leaflet(locations)%>%
    addTiles()%>%
    addProviderTiles(providers$Esri.WorldTopoMap)%>%
    addMarkers(lat = ~LocationLatitude, lng = ~LocationLongitude, 
    clusterOptions = markerClusterOptions(), popup = ~responseID)

FIX THIS CODE

Figure: Map of respondents and where they reside color coded by whether repondents gave mapping answers or did not boat (n=848)

vars <- c("mapres","responseID","LocationLongitude.x","LocationLatitude.x","didNotOperate1")
respondentLocations <- data6[vars]
#eliminting respondents where we could not get location information
respondentLocations <- subset(respondentLocations, !is.na(LocationLongitude.x))
respondentLocations$marker <- with(respondentLocations, ifelse(mapres==1, 1, ifelse(mapres==0&didNotOperate1==0,0,2)))

getColor <- function(respondentLocations) {
  sapply(respondentLocations$marker, function(marker) {
  if(marker==1) {
    "green"
  } else if(marker==0) {
    "red"
  } else {
    "blue"
  } })
}

icons <- awesomeIcons(
  icon = 'ios-close',
  iconColor = 'black',
  library = 'ion',
  markerColor = getColor(respondentLocations)
)

leaflet(respondentLocations)%>%
    addTiles()%>%
    addProviderTiles(providers$Esri.WorldTopoMap)%>%
    addAwesomeMarkers(~LocationLongitude.x, ~LocationLatitude.x, icon=icons, label=~as.character(mapres))
#%>%
    #addLegend("topright", colors = c("green","red", "blue"), labels = c("gave map response","no map response","did not boat"))

2. Elodea and other AIS concern

Table: Respondent count for boat owners who took their boat into elodea infested waterbodies in 2018, n=51

elodeaLocFreq <- features4%>%
  distinct(responseID, .keep_all=T)%>%  #Note, important to set the dot in front of keep_all!!!
  filter(wasInElodea=="Yes")%>%
  select(responseID, elodeaLoc1, elodeaLoc2, elodeaLoc3)%>% 
  gather(type, value=location, elodeaLoc1, elodeaLoc2, elodeaLoc3)%>%
  filter(location!="I cannot remember"&location!=""&location!="Campbell Lake")%>%  #taking out blanks and miscoding
  group_by(location)%>%
  summarise(count=n())

knitr::kable(elodeaLocFreq, caption = "Count of respondents visiting waterbodies with known elodea infestations in 2018, n=51")%>%
  kableExtra::kable_styling()
Count of respondents visiting waterbodies with known elodea infestations in 2018, n=51
location count
Alaganik Slough 3
Chena Lake 2
Chena River 34
Chena Slough 4
Eyak Lake 6
Eyak River 8
Lake Hood 1
Sand Lake 1
Sports Lake 1
Stormy Lake 3
Totchaket Slough 3

2.1 Elodea transmission risk map

Figure: Respondents’ (n=324) boat routes and locations of known elodea occurrence

#importing elodea presence data, and boating tracks
library(geojsonio)
## 
## Attaching package: 'geojsonio'
## The following object is masked from 'package:jsonlite':
## 
##     validate
## The following object is masked from 'package:base':
## 
##     pretty
library(leaflet)
elodea <- read.csv("boater_app/data/elodea.csv", stringsAsFactors = FALSE)
elodeaTRUE <- subset(elodea, presence=="TRUE")
Alltracks <- geojson_read("boater_app/data/map.geojson",what = "sp")
## Warning in rgdal::readOGR(input, rgdal::ogrListLayers(input), verbose =
## FALSE, : Dropping null geometries: 156, 555, 684
NotClean <- geojson_read("boater_app/data/NotClean.geojson",what = "sp")
## Warning in rgdal::readOGR(input, rgdal::ogrListLayers(input), verbose =
## FALSE, : Dropping null geometries: 223, 312
#Interactive layer display
clickMap1 <- leaflet(elodeaTRUE, width = "80%", height = 500) %>%
  setView(-147.3, 62.0, zoom = 5)%>%
  #Base groups
  addTiles(group = "OpenStreetMap") %>%
  addProviderTiles(providers$Esri.WorldTopoMap, group = "Esri Topo")%>%
  addProviderTiles(providers$Esri.WorldImagery, group = "Esri Image")%>%
  #Overlay groups
  addCircleMarkers(lat = ~latitude, lng = ~longitude, ~log(infested_area)*10, stroke = F, 
                   color="#d95f02", weight = 5, group="Elodea infestation" )%>%
  addPolylines(data=Alltracks, stroke = TRUE, color = "#1b9e77", weight = 2,
               opacity = 1.0, fill = FALSE, group = "All boat tracks (green)")%>%
  addPolylines(data=NotClean, stroke = TRUE, color = "#7570b3", weight = 2,
               opacity = 1.0, fill = FALSE, group = "No clean-drain-dry (purple)")%>%
  #Layers control
  addLayersControl(
    baseGroups = c("Esri Topo", "Esri Image","OpenStreetMap"),
    overlayGroups = c("Elodea infestation", "All boat tracks","No clean drain dry"),
    options = layersControlOptions(collapsed = FALSE)
  )
clickMap1

Table: Descriptive statistics related AIS concern for specific sectors, n=372

concSummary <- features4 %>%
  distinct(responseID, .keep_all=T)%>%
  select(sportfishConc,boatingSafetyConc,recValueConc,businessConc,realEstateConc,subsistConc,biodivConc,commFishConc) # select variables to summarise

tidyCon <- summarytools::descr(concSummary, stats = c("min","med","max","pct.valid"))
Statistic <- row.names(tidyCon) # extracting row names from the matrix above
tidyCon<- data.frame(tidyCon)%>% 
  mutate_at(1:8, funs(round(.,0)))
tidyCon <-cbind(Statistic, tidyCon) #adding variable names back in

names(tidyCon)[names(tidyCon)=="sportfishConc"] <- "Sport fishing"#renaming column
names(tidyCon)[names(tidyCon)=="boatingSafetyConc"] <- "Boating safety"
names(tidyCon)[names(tidyCon)=="recValueConc"] <- "Recreation value"
names(tidyCon)[names(tidyCon)=="businessConc"] <- "Businesses"
names(tidyCon)[names(tidyCon)=="realEstateConc"] <- "Real estate values"
names(tidyCon)[names(tidyCon)=="subsistConc"] <- "Subsistence"
names(tidyCon)[names(tidyCon)=="biodivConc"] <- "Biodiversity"
names(tidyCon)[names(tidyCon)=="commFishConc"] <- "Commercial fishing"

knitr::kable(tidyCon, caption = "Summary statistics for all rspondents' AIS concerns in specific sectors, n=324")%>%
  kableExtra::kable_styling()
Summary statistics for all rspondents’ AIS concerns in specific sectors, n=324
Statistic Sport fishing Boating safety Recreation value Businesses Real estate values Subsistence Biodiversity Commercial fishing
Min 1 1 1 1 1 1 1 1
Median 5 2 4 3 2 4 4 3
Max 5 5 5 5 5 5 5 5
Pct.Valid 75 64 73 64 68 70 71 66

2.2 Alaska boat owners’ boating and boat purchases occurring outside of Alaska

Table: Boat owner count for owners who took their boat out of state before boating in Alaska in 2018

#Total distance, total trips, total respondents, and weighted average distance per trip by age group in the personal user group
outsidersSummary <- features4%>%
  group_by(locOutside, freshOutside)%>%
  summarise(count = n_distinct(responseID),
            percentOfN = count/333)%>%
  na.omit()

knitr::kable(outsidersSummary, caption = "Percentage of boat owners boating outside of Alaska in 2018")%>%
  kableExtra::kable_styling()
Percentage of boat owners boating outside of Alaska in 2018
locOutside freshOutside count percentOfN
No 307 0.9219219
Did not report 59 0.1771772
British Columbia Yes 1 0.0030030
Kansas Yes 1 0.0030030
Washington Yes 1 0.0030030
Yukon Territories Yes 1 0.0030030

Table: Boat owner characteristics who boated outside Alaska before boating in Alaska

outsideSummary <- features4 %>%
  distinct(responseID, .keep_all=T)%>%
  filter(freshOutside=="Yes")%>%
  select(Income, age, pax, routeCount,totalKmPre,hCost, YrCostPre, totalTripsPre, totalTripsPost) # select variables to summarise

Summary <- summarytools::descr(outsideSummary, stats = c("mean", "sd", "cv","min","q1","med","q3","max","pct.valid"))
Statistic <- row.names(Summary) # extracting row names from the matrix above
Summary<- data.frame(Summary)%>% 
  mutate_at(1:9, funs(round(.,0)))
Summary <-cbind(Statistic, Summary) #adding variable names back in

names(Summary)[names(Summary)=="age"] <- "Age"#renaming misspelled column
names(Summary)[names(Summary)=="hCost"] <- "Operating cost $/h"
names(Summary)[names(Summary)=="YrCostPre"] <- "Operating cost $/year"
names(Summary)[names(Summary)=="totalTripsPre"] <- "Trips/year"
names(Summary)[names(Summary)=="totalKmPre"] <- "Km/year"
names(Summary)[names(Summary)=="totalTripsPost"] <- "Trips/year contingent"
names(Summary)[names(Summary)=="pax"] <- "Passengers"
names(Summary)[names(Summary)=="routeCount"] <- "Number of unique routes"

knitr::kable(Summary, caption = "Summary statistics for operators who took their boats outside Alaska in 2018, n=4")%>%
  kableExtra::kable_styling()
Summary statistics for operators who took their boats outside Alaska in 2018, n=4
Statistic Income Age Passengers Number of unique routes Km/year Operating cost $/h Operating cost $/year Trips/year Trips/year contingent
Mean 75000 52 2 2 91 25 2590 4 4
Std.Dev 43301 20 1 2 37 11 2141 4 4
CV 1 0 0 1 0 0 1 1 1
Min 37500 29 1 1 53 15 933 2 2
Q1 37500 35 2 1 63 16 1005 2 2
Median 75000 54 2 1 87 22 1946 2 2
Q3 112500 68 2 3 120 34 4175 6 6
Max 112500 70 3 5 138 40 5533 10 10
Pct.Valid 100 100 100 100 100 100 100 100 100

Table: Outside purchases of boats

purOutside <- data6%>%
  filter(purchaseOutside=="Yes")%>%
  group_by(purchaseType,purchaseYear)%>%
  summarise(count = n())

h1 <- purOutside
h2 <- expand.grid(purchaseType=unique(h1$purchaseType), purchaseYear=1982:2018)
h3 <- merge(h2,h1, by.x=c("purchaseType", "purchaseYear"),by.y=c("purchaseType", "purchaseYear"), all.x = TRUE)
h3[is.na(h3)]<-0

h3plot <- ggplot(h3, aes(x = purchaseYear, y = count, fill = purchaseType)) +
  geom_bar(stat = 'identity', aes(fill = purchaseType)) +
  labs(x="Year",y="Count", title="Historical purchases of boats from outside Alaska, n=459") + 
  scale_fill_discrete(name = "Purchase type") + theme_bw()
h3plot

Map: States where boats were purchased by type of purchase

purLoc <- data6%>%
  filter(purchaseOutside=="Yes")%>%
  group_by(purchaseLoc,purchaseType)%>%
  summarise(n = n())%>%
  mutate(percent = n/sum(n))
#eliminating unknown locations
purLoc <- purLoc[!purLoc$purchaseLoc=="Not known",]

purLoc <- subset(purLoc, select = -n) #cleaning up columns no longer needed
  purLoc<-purLoc%>%
  spread(key=purchaseType, value=percent)

#adding georeferences
locs_center <- read.csv("reference_tables/locs_center.csv", stringsAsFactors = F)
purLocGeo <- purLoc%>%
  left_join(locs_center, by=c("purchaseLoc"="state"))
purLocGeo[is.na(purLocGeo)] <- 0

#Minichart map for proportion of used new boats being bought
library(leaflet)

basemap <- leaflet(width = "100%", height = "650px") %>%
  addTiles()%>%
  addProviderTiles(providers$OpenStreetMap.Mapnik)

colors <- c("#1b9e77","#d95f02","#7570b3")

basemap %>%
  addMinicharts(
    purLocGeo$long, purLocGeo$lat,
    type = "pie",
    chartdata = purLocGeo[, c("New","Not known","Used")], 
    colorPalette = colors, 
    width = 50 , transitionTime = 0)

2.3 Clean Drain Dry

Figure: Stated percentage of time respondent followed clean-drain-dry procedures to minimize transmission of aquatic invasive species by age and annual distance

#dealing with 50,000 total km of boat travel outlier
pers3 <- featuresPers%>%
  subset(totalKmPre < 2000 &
         Clean != "Did not report")

#colors for plot
theme <- c('#a63603','#e6550d','#fd8d3c','#fdae6b','#fdd0a2','#feedde')
#plot
ggplot(pers3, aes(age, totalKmPre, colour = Clean)) + 
  geom_point() +
  geom_hline(aes(yintercept = 0)) + ylim(0,2000) + ylab("km / year")+
  labs(colour = "CleanDrainDry") + scale_color_manual(values=theme) 
## Warning: Removed 37 rows containing missing values (geom_point).

Figure: Stated percentage of time respondent followed clean-drain-dry procedure to minimize transmission of aquatic invasive species by age group (n=312).

persAgeBinClean <- dataPers %>%
    filter(!is.na(ageBin)) %>%
    group_by(ageBin, Clean) %>%
    summarise(n = n())
#965 total respondents, 302 reported age and Clean, 639 did not report Clean, 653 not reporting age
#plotting proportions of clean drain dry proportions by age bin
library(ggplot2)
plot1 <- ggplot(persAgeBinClean, aes(x = ageBin, y = n, fill = Clean)) +
  geom_bar(stat = 'identity', position = 'fill', aes(fill = Clean)) +
  labs(x="age group",y="proportion") + 
  scale_fill_discrete(name = "Clean Drain Dry") + theme_bw() 

plot2 <- ggplot(persAgeBinClean, aes(x = ageBin, y = n, fill = Clean, label = n)) +
  geom_bar(stat = 'identity', aes(fill = Clean)) +
  labs(x="age group",y="count") + 
  scale_fill_discrete(name = "Clean Drain Dry") + 
  geom_text(aes(y = n, label = n),size = 3, check_overlap = TRUE, position = position_stack(vjust = 0.5)) + 
  theme_bw()

ggpubr::ggarrange(plot1, plot2, nrow=1, ncol=2, common.legend = TRUE, legend = "right")

Boat trip characteristics

Table: Total distance, total trips, total respondents, and weighted average distance per trip by age group in the personal user group

ageBinSummary <- featuresPers%>%
  subset(totalKmPre < 2000)%>%
  group_by(ageBin)%>%
  summarise(count = n_distinct(responseID),
            sumTotalKm = round(sum(totalKmPre),digits =0),
            sumTrips  = sum(freqPre),
            wghtAvgDis = round(sumTotalKm/sumTrips, digits=1))
knitr::kable(ageBinSummary, caption = "Total distance, trips, respondent count, and weighted average distance per trip by age group")%>%
  kableExtra::kable_styling()
Total distance, trips, respondent count, and weighted average distance per trip by age group
ageBin count sumTotalKm sumTrips wghtAvgDis
21-30 6 4858 117 41.5
31-40 37 20102 576 34.9
41-50 56 15108 678 22.3
51-60 75 35243 1223 28.8
61-70 81 16764 748 22.4
71-80 24 4362 163 26.8
>80 1 925 16 57.8
NA 73 26078 1233 21.2

Table: Personal trip characteristics by age group

#Total distance, total trips, total respondents, and weighted average distance per trip by age group in the personal user group
ageBinSummary <- featuresPers%>%
  subset(totalKmPre < 2000)%>%
  group_by(ageBin)%>%
  summarise(count = n_distinct(responseID),
            sumTotalKm = round(sum(totalKmPre),digits =0),
            sumTrips  = sum(freqPre),
            wghtAvgDis = round(sumTotalKm/sumTrips, digits=1))
knitr::kable(ageBinSummary, caption = "Total distance, trips, respondent count, and weighted average distance per trip by age group")%>%
  kableExtra::kable_styling()
Total distance, trips, respondent count, and weighted average distance per trip by age group
ageBin count sumTotalKm sumTrips wghtAvgDis
21-30 6 4858 117 41.5
31-40 37 20102 576 34.9
41-50 56 15108 678 22.3
51-60 75 35243 1223 28.8
61-70 81 16764 748 22.4
71-80 24 4362 163 26.8
>80 1 925 16 57.8
NA 73 26078 1233 21.2

Table: Statistics related to the main purpose of routes

StatsByPur <- features4 %>%
  dplyr::group_by(primaryPur)%>%
  summarise(routeCount = n(),
            percentAllRoutes = round((n()/nrow(features4))*100,digits=0),
            meanRank = round(mean(rank),digits=0),
            meanKm = round(mean(km),digits=0),
            minKm = round(min(km),digits=0),
            maxKm= round(max(km), digits=0))%>%
  arrange(desc(routeCount))%>%
  na.omit()

names(StatsByPur)[names(StatsByPur)=="primaryPur"] <- "Primary purpose"
names(StatsByPur)[names(StatsByPur)=="routeCount"] <- "Route count"#renaming column
names(StatsByPur)[names(StatsByPur)=="percentAllRoutes"] <- "% of all routes"
names(StatsByPur)[names(StatsByPur)=="meanRank"] <- "Mean rank"
names(StatsByPur)[names(StatsByPur)=="meanKm"] <- "Average route in km"
names(StatsByPur)[names(StatsByPur)=="minKm"] <- "Shortest route in km"
names(StatsByPur)[names(StatsByPur)=="maxKm"] <- "Longest route in km"

knitr::kable(StatsByPur, caption = "Summary statistics for the primary purpose of routes, n=324")%>%
  kableExtra::kable_styling()
Summary statistics for the primary purpose of routes, n=324
Primary purpose Route count % of all routes Mean rank Average route in km Shortest route in km Longest route in km
Sport Fishing 384 50 2 42 0 497
Other 149 20 3 40 0 675
Cabin 68 9 1 27 0 139
Hunting 50 7 2 74 3 400
Subsistence Fishing 45 6 2 53 1 478
Camping 25 3 2 153 1 883
Wildlife watching 25 3 2 58 1 257
Hiking 5 1 4 62 18 90
Bird watching 4 1 1 22 0 55
Lodge 2 0 2 71 57 86
Mining 1 0 2 36 36 36

Table: Descriptive statistics related to personal operators who gave mapping answers, n=324

dataSummary <- features4 %>%
  distinct(responseID, .keep_all=T)%>%
  filter(type=="personal")%>%
  select(Income, age, pax, routeCount,totalKmPre,hCost, YrCostPre, totalTripsPre, totalTripsPost) # select variables to summarise

tidySummary <- summarytools::descr(dataSummary, stats = c("mean", "sd", "cv","min","q1","med","q3","max","pct.valid"))
Statistic <- row.names(tidySummary) # extracting row names from the matrix above
tidySummary<- data.frame(tidySummary)%>% 
  mutate_at(1:9, funs(round(.,0)))
tidySummary <-cbind(Statistic, tidySummary) #adding variable names back in

names(tidySummary)[names(tidySummary)=="age"] <- "Age"#renaming misspelled column
names(tidySummary)[names(tidySummary)=="hCost"] <- "Operating cost $/h"
names(tidySummary)[names(tidySummary)=="YrCostPre"] <- "Operating cost $/year"
names(tidySummary)[names(tidySummary)=="totalTripsPre"] <- "Trips/year"
names(tidySummary)[names(tidySummary)=="totalKmPre"] <- "Km/year"
names(tidySummary)[names(tidySummary)=="totalTripsPost"] <- "Trips/year contingent"
names(tidySummary)[names(tidySummary)=="pax"] <- "Passengers"
names(tidySummary)[names(tidySummary)=="routeCount"] <- "Number of unique routes"

knitr::kable(tidySummary, caption = "Summary statistics for personal operators with mapping answers, n=324")%>%
  kableExtra::kable_styling()
Summary statistics for personal operators with mapping answers, n=324
Statistic Income Age Passengers Number of unique routes Km/year Operating cost $/h Operating cost $/year Trips/year Trips/year contingent
Mean 102590 55 3 2 262 20 3429 14 14
Std.Dev 50369 12 2 2 818 20 7087 25 25
CV 0 0 1 1 3 1 2 2 2
Min 12500 22 0 1 0 0 0 1 1
Q1 62500 45 2 1 14 6 164 2 2
Median 87500 56 2 1 44 13 653 6 6
Q3 137500 65 3 3 190 27 2817 16 16
Max 225000 81 30 11 8036 100 44436 265 265
Pct.Valid 70 79 80 100 100 68 68 100 100

Boat operating cost

Tables: Annual operating cost followed by operating cost per hour by operator type

resSum <- features4%>%
  group_by(responseID, type)%>%
  summarise(YrTravelCost = sum(YrCostPre))

#Calculating descriptive stats for annual operating cost by operator type
TCByType <- resSum%>%
  group_by(type)%>%
  summarise(count = n(),
            mean = round(mean(YrTravelCost,na.rm = TRUE),digits=1),
            median = round(median(YrTravelCost,na.rm = TRUE),digits=1),
            sd = round(sd(YrTravelCost,na.rm = TRUE),digits=1),
            max = round(max(YrTravelCost,na.rm = TRUE),digits=1),
            min = round(min(YrTravelCost,na.rm = TRUE),digits=1),
            cv = round(sd/mean, digits=1))
knitr::kable(TCByType, caption = "Estimated annual operating cost by operator type")%>%
  kableExtra::kable_styling()
Estimated annual operating cost by operator type
type count mean median sd max min cv
commercial 7 12848.1 3822.2 17607.9 41060.0 41.0 1.4
government 4 6301.8 6301.8 NaN 6301.8 6301.8 NaN
personal 360 7066.4 968.2 15276.9 113662.1 0.0 2.2
hCByType <- features4%>%
  group_by(type)%>%
  summarise(count = n(),
            mean = round(mean(hCost,na.rm = TRUE),digits=1),
            median = round(median(hCost,na.rm = TRUE),digits=1),
            sd = round(sd(hCost,na.rm = TRUE),digits=1),
            max = round(max(hCost,na.rm = TRUE),digits=1),
            min = round(min(hCost,na.rm = TRUE),digits=1),
            cv = round(sd/mean, digits=1))

knitr::kable(hCByType, caption = "Stated hourly operating cost by operator type")%>%
  kableExtra::kable_styling()
Stated hourly operating cost by operator type
type count mean median sd max min cv
commercial 8 21.2 22.2 18.9 40 1.5 0.9
government 15 29.4 5.0 33.6 70 5.0 1.1
personal 741 21.0 15.0 18.5 100 0.0 0.9